map <-
read_csv("data/HydroWASTE_v10.csv")
## Rows: 58502 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): WWTP_NAME, COUNTRY, CNTRY_ISO, STATUS, LEVEL
## dbl (20): WASTE_ID, SOURCE, ORG_ID, LAT_WWTP, LON_WWTP, QUAL_LOC, LAT_OUT, L...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(map) |>
knitr::kable()
| WASTE_ID | SOURCE | ORG_ID | WWTP_NAME | COUNTRY | CNTRY_ISO | LAT_WWTP | LON_WWTP | QUAL_LOC | LAT_OUT | LON_OUT | STATUS | POP_SERVED | QUAL_POP | WASTE_DIS | QUAL_WASTE | LEVEL | QUAL_LEVEL | DF | HYRIV_ID | RIVER_DIS | COAST_10KM | COAST_50KM | DESIGN_CAP | QUAL_CAP |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 1140441 | Akmenes aglomeracija | Lithuania | LTU | 56.247 | 22.726 | 2 | 56.223 | 22.627 | Not Reported | 1060 | 2 | 148.213 | 4 | Advanced | 1 | 2421.974 | 20228874 | 4.153 | 0 | 0 | 4600 | 2 |
| 2 | 1 | 1140443 | Alytaus m aglomeracija | Lithuania | LTU | 54.432 | 24.056 | 2 | 54.519 | 24.098 | Not Reported | 87900 | 2 | 8797.904 | 1 | Advanced | 1 | 2534.527 | 20261585 | 257.983 | 0 | 0 | 220000 | 2 |
| 3 | 1 | 1140445 | Anyksciu aglomeracija | Lithuania | LTU | 55.509 | 25.073 | 2 | 55.452 | 25.006 | Not Reported | 12400 | 2 | 1959.285 | 1 | Advanced | 1 | 1367.809 | 20243105 | 30.995 | 0 | 0 | 33000 | 2 |
| 4 | 1 | 1140447 | Ariogalos aglomeracija | Lithuania | LTU | 55.252 | 23.484 | 2 | 55.210 | 23.510 | Not Reported | 2500 | 2 | 578.482 | 1 | Secondary | 1 | 2061.969 | 20247446 | 13.799 | 0 | 0 | 4357 | 2 |
| 5 | 1 | 1140449 | Baisogalos aglomeracija | Lithuania | LTU | 55.644 | 23.741 | 2 | 55.681 | 23.835 | Not Reported | 1200 | 2 | 167.788 | 4 | Secondary | 1 | 209.549 | 20239330 | 0.405 | 0 | 0 | 1490 | 2 |
| 6 | 1 | 1140451 | Birstono Prienu aglomeracija | Lithuania | LTU | 54.623 | 24.062 | 2 | 54.715 | 24.094 | Not Reported | 12400 | 2 | 2239.471 | 1 | Advanced | 1 | 10366.240 | 20256987 | 268.665 | 0 | 0 | 19000 | 2 |
wwtp <-
read_csv("data/SARS-CoV-2_concentrations_measured_in_NYC_Wastewater_20231129.csv")
## Rows: 4270 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Sample Date, Test date, WRRF Name, WRRF Abbreviation, Annotation, T...
## num (3): Concentration SARS-CoV-2 gene target (N1 Copies/L), Per capita SARS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(wwtp) |>
knitr::kable()
| Sample Date | Test date | WRRF Name | WRRF Abbreviation | Concentration SARS-CoV-2 gene target (N1 Copies/L) | Per capita SARS-CoV-2 load (N1 copies per day per population) | Annotation | Population Served, estimated | Technology |
|---|---|---|---|---|---|---|---|---|
| 08/31/2020 | 09/01/2020 | 26th Ward | 26W | 389 | 263535.6 | Concentration below Method Limit of Quantification (above Method Limit of Detection) | 290608 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Bowery Bay | BB | 1204 | 443632.9 | NA | 924695 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Coney Island | CI | 304 | 168551.6 | Concentration below Method Limit of Quantification (above Method Limit of Detection) | 682342 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Hunts Point | HP | 940 | 574446.6 | NA | 755948 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Jamaica Bay | JA | 632 | 233077.7 | NA | 748737 | RT-qPCR |
| 08/31/2020 | 09/01/2020 | Newtown Creek | NC | 197 | 122396.8 | Concentration below Method Limit of Quantification (above Method Limit of Detection) | 1156473 | RT-qPCR |
map_new <-
map |>
janitor::clean_names() |>
select(waste_id, wwtp_name, lat_wwtp, lon_wwtp) |>
mutate(wrrf_abbreviation = recode( wwtp_name,
"New York C Rockaway WPCP" = "RK",
"New York C Red Hook WPCP" = "RH",
"New York C Port Richmond WPCP" = "PR",
"New York C Oakwood Beach WPCP" = "OB",
"New York C 26th Ward WPCP" = "26W",
"New York C Tallman Island WPCP" = "TI",
"New York C North River WPCP" = "NR",
"New York C Coney Island WPCP" = "CI",
"New York C Jamaica WPCP" = "JA",
"New York C Hunts Point WPCP" = "HP",
"New York C Owls Head WPCP" = "OH",
"New York C Bowery Bay WPCP" = "BB",
"New York C Newtown Creek WPCP" = "NC",
"New York C Wards Island WPCP" = "WI",
))
wwtp_cleaned <-
wwtp |>
janitor:: clean_names()
merge_df <- inner_join(map_new, wwtp_cleaned, by = "wrrf_abbreviation")
nyc_wwtp <-
merge_df |>
rename(concentration = concentration_sars_co_v_2_gene_target_n1_copies_l) |>
drop_na(concentration) |>
separate(sample_date, into = c("month", "day", "year"), convert = TRUE) %>%
mutate(
year = as.character(year),
month = factor(month, levels = 1:12),
month = recode(month,
"1" = "January",
"2" = "February",
"3" = "March",
"4" = "April",
"5" = "May",
"6" = "June",
"7" = "July",
"8" = "August",
"9" = "September",
"10" = "October",
"11" = "November",
"12" = "December")) |>
select(-waste_id, -test_date, -per_capita_sars_co_v_2_load_n1_copies_per_day_per_population, -population_served_estimated, -wwtp_name)
nyc_wwtp
## # A tibble: 4,129 × 10
## lat_wwtp lon_wwtp wrrf_abbreviation month day year wrrf_name concentration
## <dbl> <dbl> <chr> <fct> <int> <chr> <chr> <dbl>
## 1 40.8 -73.9 WI Augu… 31 2020 Wards Is… 1339
## 2 40.8 -73.9 WI Sept… 8 2020 Wards Is… 1065
## 3 40.8 -73.9 WI Sept… 13 2020 Wards Is… 394
## 4 40.8 -73.9 WI Sept… 15 2020 Wards Is… 316
## 5 40.8 -73.9 WI Sept… 20 2020 Wards Is… 1250
## 6 40.8 -73.9 WI Octo… 6 2020 Wards Is… 1152
## 7 40.8 -73.9 WI Octo… 11 2020 Wards Is… 600
## 8 40.8 -73.9 WI Octo… 13 2020 Wards Is… 2066
## 9 40.8 -73.9 WI Octo… 18 2020 Wards Is… 444
## 10 40.8 -73.9 WI Octo… 20 2020 Wards Is… 3026
## # ℹ 4,119 more rows
## # ℹ 2 more variables: annotation <chr>, technology <chr>
Creating the dataset for an overall trend for Covid-19 concentration across the 14 waste water facilities within New York State from 2020 to 2023.
overall_trend <-
nyc_wwtp |>
filter(technology == "RT-qPCR") |>
select(-lat_wwtp, -lon_wwtp, -annotation) |>
group_by(year, month) |>
summarise(avg_conc = mean(concentration)) |>
unite("year_month",year, month, sep = "_") |>
mutate(Month = ymd(paste0(year_month, "_01")))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
Plotting Scatterplot
overall_trend |>
ggplot(aes(x = Month, y = avg_conc)) +
geom_point(color = c("#FFA500")) +
geom_smooth(se = FALSE, color = "dark grey") +
labs(
x = "Time",
y = "Monthly average Covid-19 concentration in waste water in NY (N/L)") +
theme(axis.line = element_line(color = "grey"),
panel.background = element_blank(),
legend.position = "none",
panel.grid.major = element_line(color = "light grey", linetype = "dashed"),
plot.title = element_text(hjust = 0.5)) +
ggtitle("Monthly average Covid-19 concentration in waste water in NY measured from 2020 to 2023")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Histogram
overall_trend |>
ggplot(aes(x = year_month, y = avg_conc)) +
geom_col(position = "dodge") +
stat_summary(fun = mean, geom = "line", aes(group = 1), color = "orange", size = 1) +
labs(x = "Year-Month", y = "Average Concentration") +
theme(axis.text.x = element_text(angle = 60, hjust = 1, color = "black"))
Sub dataset
rtqpcr <-
nyc_wwtp |>
filter(year %in% c(2021, 2022), technology == "RT-qPCR") |>
select(-lat_wwtp, -lon_wwtp, -annotation)
dpcr <-
nyc_wwtp |>
filter(year %in% c(2021, 2022), technology == "dPCR") |>
select(-lat_wwtp, -lon_wwtp, -annotation)
dpcr is not large enough to do data analysis.
data analysis
trend_plot_2021 <-
rtqpcr |>
filter(year == 2021) |>
group_by(month, wrrf_name) |>
summarise(avg_conc = mean(concentration)) |>
ggplot(aes(x = month, y = avg_conc, color = wrrf_name, group = wrrf_name)) +
geom_line(alpha = .5) +
geom_point(alpha = .5) +
labs(x = "Month",
y = "Average Covid-19 Concentration of each waste water facility",
title = "Average Covid-19 Concentration of each waste water facility in 2021 in New York State") +
theme(legend.position = "bottom",
legend.box.background = element_rect())
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
trend_plot_2021
trend_plot_2022
trend_plot_2022 <-
rtqpcr |>
filter(year == 2022) |>
group_by(month, wrrf_name) |>
summarise(avg_conc = mean(concentration)) |>
ggplot(aes(x = month, y = avg_conc, color = wrrf_name, group = wrrf_name)) +
geom_line(alpha = .5) +
geom_point(alpha = .5) +
labs(x = "Month",
y = "Average Covid-19 Concentration of each waste water facility",
title = "Average Covid-19 Concentration of each waste water facility in 2022 in New York State") +
theme(legend.position = "bottom",
legend.box.background = element_rect())
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
trend_plot_2022
tables 2021
box_2021 <- rtqpcr %>%
filter(year == 2021) %>%
group_by(month, wrrf_name) %>%
summarise(avg_conc = mean(concentration)) %>%
plot_ly(
x = ~avg_conc,
y = ~wrrf_name,
type = "box",
color = ~wrrf_name,
colors = "viridis"
) %>%
layout(
xaxis = list(title = "Mean concentration (N/L)"),
yaxis = list(title = "Area"),
showlegend = FALSE
)
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
box_2021
tables 2022
box_2022 <- rtqpcr %>%
filter(year == 2022) %>%
group_by(month, wrrf_name) %>%
summarise(avg_conc = mean(concentration)) %>%
plot_ly(
x = ~avg_conc,
y = ~wrrf_name,
type = "box",
color = ~wrrf_name,
colors = "viridis"
) %>%
layout(
xaxis = list(title = "Mean concentration (N/L)"),
yaxis = list(title = "Area"),
showlegend = FALSE
)
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
box_2021
nyc_wwtp |>
filter(year == 2022, technology == "RT-qPCR") |>
group_by(month, wrrf_abbreviation) |>
ggplot(aes(x = month, y = concentration, color = wrrf_abbreviation)) +
geom_point(alpha = .5)
library(leaflet)
wwtp_map <-
nyc_wwtp |>
leaflet() |>
addTiles() |>
addMarkers(
~lon_wwtp, ~lat_wwtp
)
wwtp_map
14 wwtps map
##Questions 1 The concentration of covid for each station on average 2 literature review foe two diff tech (RT-qPCR, dPCR) 3 Year 2021 and 2022